{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import libraries and genome\n",
    "import pandas as pd\n",
    "from Bio import SeqIO\n",
    "\n",
    "my_seqlist = []\n",
    "for seq_record in SeqIO.parse('genome/Mus_musculus.GRCm38.chromosome.1.fa', 'fasta'):\n",
    "    my_seqlist.append(seq_record)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total number of seqs\n",
      "66\n",
      "ID seq 1\n",
      "1\n",
      "First 100 bp seq 1\n",
      "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n",
      "Total length seq 1\n",
      "195471971\n"
     ]
    }
   ],
   "source": [
    "print(\"Total number of seqs\")\n",
    "print(len(my_seqlist))\n",
    "print(\"ID seq 1\")\n",
    "print(my_seqlist[0].id)\n",
    "print(\"First 100 bp seq 1\")\n",
    "print(my_seqlist[0].seq[0:100])\n",
    "print(\"Total length seq 1\")\n",
    "print(len(my_seqlist[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "256"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Obtain CG kmers list\n",
    "from itertools import product\n",
    "\n",
    "kmers = list(product('ATCG', repeat=6))\n",
    "kmers = [\"\".join(x) for x in kmers]\n",
    "\n",
    "kmers_CG = []\n",
    "for i in kmers:\n",
    "    if i[2:4] == \"CG\":\n",
    "        kmers_CG.append(i)\n",
    "\n",
    "len(kmers_CG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Dictionary key location\n",
    "dict_chr_loc = {}\n",
    "for i in range(len(my_seqlist)):\n",
    "    dict_chr_loc[my_seqlist[i].id] = i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load filenames\n",
    "from os import listdir\n",
    "\n",
    "files = listdir(\"Claudia\")\n",
    "inputs = []\n",
    "for i in range(len(files)):\n",
    "    if files[i].endswith(\"deduplicated.txt\") == True:\n",
    "        inputs.append(files[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def kmer_analysis(file_name,folder_input,folder_output):    \n",
    "    file_input = str(folder_input) + \"/\"+ str(file_name)\n",
    "    print(file_input)\n",
    "    df = pd.read_csv(file_input,sep='\\t',skiprows=1,header=None)\n",
    "\n",
    "    pd_kmers_CG = pd.DataFrame(0, index=kmers_CG, columns=['Met','Unmet'])\n",
    "\n",
    "    for i in range(len(df)):\n",
    "        position = my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-1]\n",
    "        if position == \"C\":\n",
    "            substring = my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-3:df[3][i]+3]\n",
    "        elif position ==\"G\":\n",
    "            substring = (my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-4:df[3][i]+2]).reverse_complement()\n",
    "        if (substring[2:4] == \"CG\") & (str(substring) in kmers_CG):\n",
    "            if df[4][i] == \"Z\":\n",
    "                pd_kmers_CG.loc[str(substring)][0] = (pd_kmers_CG.loc[str(substring)][0]) + 1\n",
    "            else:\n",
    "                pd_kmers_CG.loc[str(substring)][1] = (pd_kmers_CG.loc[str(substring)][1]) + 1\n",
    "\n",
    "    pd_kmers_CG['Total'] = pd_kmers_CG.sum(axis=1)\n",
    "    pd_kmers_CG['Per_met'] = pd_kmers_CG['Met']*100/pd_kmers_CG['Total']\n",
    "    file_output = str(folder_output) + \"/\" + str(file_name) + \".csv\"\n",
    "    print(file_output)\n",
    "    pd_kmers_CG.to_csv(file_output)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Claudia/CpG_context_50ugmL_Asc_2i_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_12_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmL_Asc_2i_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmL_Asc_2i_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_18_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/oscarortega/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2821: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  if self.run_code(code, result):\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "list indices must be integers or slices, not NoneType",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-7776d0d226ca>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mkmer_analysis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"Claudia\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\"Claudia_output\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-11-648987ef42f2>\u001b[0m in \u001b[0;36mkmer_analysis\u001b[0;34m(file_name, folder_input, folder_output)\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m         \u001b[0mposition\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmy_seqlist\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict_chr_loc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mposition\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"C\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m             \u001b[0msubstring\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmy_seqlist\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict_chr_loc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: list indices must be integers or slices, not NoneType"
     ]
    }
   ],
   "source": [
    "for i in range(len(inputs)):\n",
    "    kmer_analysis(inputs[i],\"Claudia\",\"Claudia_output\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "inputs_2 = inputs[64:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_DOX_2i_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_DOX_2i_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_A_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_B_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_0h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_12h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_18h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_30h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_36h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_42h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_50ugmLAsc_Dox_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_50ugmLAsc_Dox_C_6h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmL_Asc_2i_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmL_Asc_2i_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_DOX_2i_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_A_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_B_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_48h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_54h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_60h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_66h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_72h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_72h_serum_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_78h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Claudia/CpG_context_iSeq008_50ugmLAsc_Dox_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Claudia_output/CpG_context_iSeq008_50ugmLAsc_Dox_C_84h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n"
     ]
    }
   ],
   "source": [
    "for i in range(len(inputs_2)):\n",
    "    kmer_analysis(inputs_2[i],\"Claudia\",\"Claudia_output\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Claudia/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load filenames\n",
    "from os import listdir\n",
    "\n",
    "files = listdir(\"Rosie\")\n",
    "inputs = []\n",
    "for i in range(len(files)):\n",
    "    if files[i].endswith(\"deduplicated.txt\") == True:\n",
    "        inputs.append(files[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_TET_TKO_5AZA_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_TET_TKO_5AZA_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_24h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_24h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_24h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_24h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_24h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_24h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_30h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_30h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_30h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_30h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_30h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_30h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_36h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_36h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_36h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_36h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_36h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_36h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_42h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_42h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_42h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_42h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_42h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_42h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_48h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_48h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_48h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_48h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_48h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_48h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_5AZA_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_5AZA_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_0h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_0h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_0h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_12h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_12h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_12h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_18h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_18h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_18h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_24h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_24h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_24h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_24h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_24h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_24h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_30h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_30h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_30h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_30h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_30h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_30h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_36h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_36h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_36h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_36h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_36h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_36h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_42h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_42h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_42h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_42h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_42h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_42h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_48h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_48h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_48h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_48h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_48h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_48h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_6h_R1_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_6h_R2_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n",
      "Rosie/CpG_context_iSeq009_V65_negative_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt\n",
      "Rosie_output/CpG_context_iSeq009_V65_negative_6h_R3_RG_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n"
     ]
    }
   ],
   "source": [
    "for i in range(len(inputs)):\n",
    "    kmer_analysis(inputs[i],\"Rosie\",\"Rosie_output\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load filenames\n",
    "from os import listdir\n",
    "\n",
    "files = listdir(\"Claudia\")\n",
    "inputs = []\n",
    "for i in range(len(files)):\n",
    "    if files[i].endswith(\"deduplicated.txt\") == True:\n",
    "        inputs.append(files[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Claudia/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt'"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs_3 = inputs[63]\n",
    "inputs[63]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "#kmer_analysis(inputs_3,\"Claudia\",\"Claudia_output\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_name = 'CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt'\n",
    "folder_input = 'Claudia'\n",
    "folder_output = 'Claudia_output'\n",
    "file_input =str(folder_input) + \"/\"+ str(file_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(file_input,sep='\\t',skiprows=1,header=None,low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1441894</th>\n",
       "      <td>7001326F:234:CE7THANXX:8:2316:20830:100933_1:N...</td>\n",
       "      <td>+</td>\n",
       "      <td>8</td>\n",
       "      <td>12489527</td>\n",
       "      <td>Z</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441895</th>\n",
       "      <td>7001326F:234:CE7THANXX:8:2316:7325:101029_1:N:...</td>\n",
       "      <td>-</td>\n",
       "      <td>1</td>\n",
       "      <td>38362085</td>\n",
       "      <td>z</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441896</th>\n",
       "      <td>7001326F:234:CE7THANXX:8:2316:7325:101029_1:N:...</td>\n",
       "      <td>-</td>\n",
       "      <td>1</td>\n",
       "      <td>38362100</td>\n",
       "      <td>z</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441897</th>\n",
       "      <td>7001326F:234:CE7THANXX:8:2316:12384:101214_1:N...</td>\n",
       "      <td>+</td>\n",
       "      <td>2</td>\n",
       "      <td>39392801</td>\n",
       "      <td>Z</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1441898</th>\n",
       "      <td>7001326F:234:CE7THANXX:8:2316:18799:101331_1:N...</td>\n",
       "      <td>-</td>\n",
       "      <td>18</td>\n",
       "      <td>36308357</td>\n",
       "      <td>z</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                         0  1   2         3  4\n",
       "1441894  7001326F:234:CE7THANXX:8:2316:20830:100933_1:N...  +   8  12489527  Z\n",
       "1441895  7001326F:234:CE7THANXX:8:2316:7325:101029_1:N:...  -   1  38362085  z\n",
       "1441896  7001326F:234:CE7THANXX:8:2316:7325:101029_1:N:...  -   1  38362100  z\n",
       "1441897  7001326F:234:CE7THANXX:8:2316:12384:101214_1:N...  +   2  39392801  Z\n",
       "1441898  7001326F:234:CE7THANXX:8:2316:18799:101331_1:N...  -  18  36308357  z"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tail(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Claudia_ouput/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv\n"
     ]
    },
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'Claudia_ouput/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-75-c6b11a293207>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0mfile_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfolder_output\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_name\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\".csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0mpd_kmers_CG\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/Users/oscarortega/anaconda/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, tupleize_cols, date_format, doublequote, escapechar, decimal)\u001b[0m\n\u001b[1;32m   1743\u001b[0m                                  \u001b[0mdoublequote\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdoublequote\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1744\u001b[0m                                  escapechar=escapechar, decimal=decimal)\n\u001b[0;32m-> 1745\u001b[0;31m         \u001b[0mformatter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1746\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1747\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mpath_or_buf\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/oscarortega/anaconda/lib/python3.6/site-packages/pandas/io/formats/csvs.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    154\u001b[0m             f, handles = _get_handle(self.path_or_buf, self.mode,\n\u001b[1;32m    155\u001b[0m                                      \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m                                      compression=self.compression)\n\u001b[0m\u001b[1;32m    157\u001b[0m             \u001b[0mclose\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/oscarortega/anaconda/lib/python3.6/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36m_get_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text)\u001b[0m\n\u001b[1;32m    398\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m             \u001b[0;31m# Python 3 and encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m             \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    401\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mis_text\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    402\u001b[0m             \u001b[0;31m# Python 3 and no explicit encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Claudia_ouput/CpG_context_50ugmLAsc_DOX_2i_C_24h_fastq.gz_trimmed_bismark_bt2.deduplicated.txt.csv'"
     ]
    }
   ],
   "source": [
    "pd_kmers_CG = pd.DataFrame(0, index=kmers_CG, columns=['Met','Unmet'])\n",
    "\n",
    "for i in range(len(df)):\n",
    "    position = my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-1]\n",
    "    if position == \"C\":\n",
    "        substring = my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-3:df[3][i]+3]\n",
    "    elif position ==\"G\":\n",
    "        substring = (my_seqlist[dict_chr_loc.get(df[2][i])].seq[(df[3][i])-4:df[3][i]+2]).reverse_complement()\n",
    "    if (substring[2:4] == \"CG\") & (str(substring) in kmers_CG):\n",
    "        if df[4][i] == \"Z\":\n",
    "            pd_kmers_CG.loc[str(substring)][0] = (pd_kmers_CG.loc[str(substring)][0]) + 1\n",
    "        else:\n",
    "            pd_kmers_CG.loc[str(substring)][1] = (pd_kmers_CG.loc[str(substring)][1]) + 1\n",
    "\n",
    "pd_kmers_CG['Total'] = pd_kmers_CG.sum(axis=1)\n",
    "pd_kmers_CG['Per_met'] = pd_kmers_CG['Met']*100/pd_kmers_CG['Total']\n",
    "file_output = str(folder_output) + \"/\" + str(file_name) + \".csv\"\n",
    "print(file_output)\n",
    "pd_kmers_CG.to_csv(file_output)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
